# Daniel Berliner & Joachim Wehner
# "Audits for Accountability: Evidence from Municipal By-elections in South Africa"
# The Journal of Politics
# R replication code file for ancillary figures and analyses
# Use together with Berliner-Wehner_dataset.dta, sabinet_daily_counts.csv, data_for_model_of_byelect_frequency.csv, and audit_outcomes.csv
# R version 4.0.2
# Date prepared: May 14, 2021


library(readstata13)
library(texreg)
library(rdd)
library(zoo)
library(lubridate)
library(RColorBrewer)


setwd("YOUR FILE PATH HERE")

# main data file
d<-read.dta13("Berliner-Wehner_main.dta")

# print variable labels from Stata
cbind(names(d), attr(d, "var.labels"))


# Figure 1: Media attention to municipal audits

media<-read.csv("sabinet_daily_counts.csv")
media$date<-ymd(media$date)

media$news7rollsum<-c(rep(NA, 6), rollapply(media$newscount, 7, sum) )


audit_dates<-c(
"July 10, 2009", 
"June 7, 2010", 
"June 29, 2011",
"July 23, 2012",
"August 13, 2013",
"July 30, 2014",
"June 3, 2015",
"June 1, 2016",
"June 21, 2017"	)



pdf(file="figure1_mediaattention.pdf", width=12, height=5)
plot(media$date, media$news7rollsum, type="n",
	xlab="Year", ylab="Number of Articles (Seven-Day Rolling Sum)", main="", xaxt="n")
for(i in 1:length(audit_dates)){
	polygon(x=c(rep( mdy(audit_dates[i]), 2), rep( mdy(audit_dates[i])+90, 2)), y=c(-20,80,80,-20), col="gray80", border=NA)
}

lines(media$date, media$news7rollsum)
axis(side=1, at=mdy(c(
	"January 1, 2001", 
	"January 1, 2002", 
	"January 1, 2003", 
	"January 1, 2004", 
	"January 1, 2005", 
	"January 1, 2006", 
	"January 1, 2007",
	"January 1, 2008",  
	"January 1, 2009", 
	"January 1, 2010", 
	"January 1, 2011", 
	"January 1, 2012", 
	"January 1, 2013", 
	"January 1, 2014", 
	"January 1, 2015", 
	"January 1, 2016",
	"January 1, 2017")), 
labels=2001:2017 )

dev.off()







cols<-brewer.pal(9, "RdPu")

pdf(file="figureA1_mediaattentionoverlay.pdf", width=12,height=7)
plot(media$date[media$year==2009], media$news7rollsum[media$year==2009], type="n", ylim=c(0,70), xlab="Date", 
	ylab="Number of Articles (Seven-Day Rolling Sum)", xaxt="n")
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "gray80")
lines(media$date[media$year==2009], media$news7rollsum[media$year==2009], lwd=2, col=cols[1])
lines(media$date[media$year==2010]-years(1), media$news7rollsum[media$year==2010], lwd=2, col=cols[2])
lines(media$date[media$year==2011]-years(2), media$news7rollsum[media$year==2011], lwd=2, col=cols[3])
lines(media$date[media$year==2012]-years(3), media$news7rollsum[media$year==2012], lwd=2, col=cols[4])
lines(media$date[media$year==2013]-years(4), media$news7rollsum[media$year==2013], lwd=2, col=cols[5])
lines(media$date[media$year==2014]-years(5), media$news7rollsum[media$year==2014], lwd=2, col=cols[6])
lines(media$date[media$year==2015]-years(6), media$news7rollsum[media$year==2015], lwd=2, col=cols[7])
lines(media$date[media$year==2016]-years(7), media$news7rollsum[media$year==2016], lwd=2, col=cols[8])
lines(media$date[media$year==2017]-years(8), media$news7rollsum[media$year==2017], lwd=2, col=cols[9])
axis(side=1, at=mdy(c(
"January 1, 2009",
"February 1, 2009",
"March 1, 2009",
"April 1, 2009",
"May 1, 2009",
"June 1, 2009",
"July 1, 2009",
"August 1, 2009",
"September 1, 2009",
"October 1, 2009",
"November 1, 2009",
"December 1, 2009")),
labels=c("Jan.","Feb.","March","April","May","June","July","Aug.","Sept.","Oct.","Nov.","Dec."))

abline(v=mdy(c(
"July 10, 2009", 
"June 7, 2009", 
"June 29, 2009",
"July 23, 2009",
"August 13, 2009",
"July 30, 2009",
"June 3, 2009",
"June 1, 2009",
"June 21, 2009"	)), lwd=1, lty=1, col=cols)

legend(x=mdy("November 1, 2009"), y=60,
	legend=paste(2009:2017), col=cols, lwd=2)
dev.off()









# Table A3

audits<-read.csv("audit_outcomes.csv")

audittab<-as.data.frame(cbind(2008:2015,
rbind(table(audits$outcome_char[audits$year==2008]), 
	table(audits$outcome_char[audits$year==2009]),
	table(audits$outcome_char[audits$year==2010]),
	table(audits$outcome_char[audits$year==2011]),
	table(audits$outcome_char[audits$year==2012]),
	table(audits$outcome_char[audits$year==2013]),
	table(audits$outcome_char[audits$year==2014]),
	table(audits$outcome_char[audits$year==2015])	)
))
names(audittab)[1]<-"Year"
audittab$Total<-rowSums(audittab[,2:7])
audittab[,2:7]<-round(audittab[,2:7]/audittab$Total,3)
audittab$FinancialYear<-c("2006-2007",
"2007-2008",
"2008-2009",
"2009-2010",
"2010-2011",
"2011-2012",
"2012-2013",
"2013-2014")

audittab[,c("FinancialYear","CLEAN","UNQUALIFIED WITH FINDINGS","QUALIFIED","ADVERSE","DISCLAIMER","OUTSTANDING")]



# Figure A7

pdf(file="audit_outcomes_plot.pdf", width=10, height=6)
plot(2007:2014, audittab$CLEAN, type="n", ylim=c(0, 0.6), xlab="Financial year", ylab="Proportion of Audits", xaxt="n")
lines(2007:2014, audittab$CLEAN, col="green3", lwd=4)
lines(2007:2014, audittab$"UNQUALIFIED WITH FINDINGS", col="yellow2", lwd=4)
lines(2007:2014, audittab$QUALIFIED, col="purple", lwd=4)
lines(2007:2014, audittab$ADVERSE, col="pink", lwd=4)
lines(2007:2014, audittab$DISCLAIMER, col="red", lwd=4)
lines(2007:2014, audittab$OUTSTANDING, col="gray60", lwd=4)
axis(side=1, at=2007:2014, labels=audittab$FinancialYear)
legend(x=2007.5, y=0.6, lwd=rep(3,6), col=c("green3","yellow2","purple","pink","red","gray60"),
	legend=c("Clean","Unqualified","Qualified","Adverse","Disclaimer","Outstanding"), bty="n", cex=0.9)
dev.off()






# Table A4

reasontable<-as.data.frame(cbind(table(d$reasonshort), tapply(d$window90, d$reasonshort, mean)))
names(reasontable)<-c("frequency","proportion_inwindow")
reasontable[order(reasontable$frequency,decreasing=T),]





# Table A5: Balance statistics

balancetablefunction<-function(data, x){
	x1<-as.vector(x)
	output<-NULL
	for(i in 1:length(x1)){
		temp_out<-t.test(data[data$window90==0,x1[i]], data[data$window90==1,x1[i]])
		output<-rbind(output, as.vector(c(x1[i], round(temp_out$estimate,3), round(temp_out$p.value,3), symnum(temp_out$p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", " ")))) )
	}
	output<-as.data.frame(output)
	names(output)<-c("Variable","Mean Outside Window","Mean Inside Window","p_value","signif")
	output
}



d$contested_but_relevantpartydidntrun<-as.numeric(d$vote_party_relevant==0 & d$uncontested==0)
d$audit_outstanding<-as.numeric(d$outcome_char=="OUTSTANDING")
d$audit_disclaimer<-as.numeric(d$outcome_char=="DISCLAIMER")
d$audit_adverse<-as.numeric(d$outcome_char=="ADVERSE")
d$audit_qualified<-as.numeric(d$outcome_char=="QUALIFIED")
d$audit_unqualified<-as.numeric(d$outcome_char=="UNQUALIFIED WITH FINDINGS")
d$audit_clean<-as.numeric(d$outcome_char=="CLEAN")
d$outgoingparty_ANC<-as.numeric(d$outgoing_party=="AFRICAN NATIONAL CONGRESS")
d$relevantparty_ANC<-as.numeric(d$party_relevant=="ANC")
d$metro<-as.numeric(d$code %in% c("BUF","CPT","EKU","ETH","JHB","MAN","NMA","TSH"))


vars<-c( # in alphabetical order of labels
"better_time",
"outcome_change",
"worse_time",
"audit_adverse",
"audit_clean",
"audit_disclaimer",
"audit_outstanding",
"audit_qualified",
"audit_unqualified",
"barriers_index",
"uncontested",
"edu_attending2011",
"seatdiff",
"contested_but_relevantpartydidntrun",
"relevantparty_ANC",
"party_largest_seatshare",
"lingfrac",
"logincome",
"logpop",
"localmedia",
"metro",
"numparties",
"anc_share",
"localelec_vote_party_relevant",
"localelec_turnoutpct",
"outgoingparty_ANC",
"localelec_winpct",
"toilet_flush2011",
"tech_internet2011",
"edu_matric_higher2011",
"tech_radio2011",
"water_home2011",
"pop_white2011"
)


newbal_all<-balancetablefunction(d[d$year>=2008 ,], x=vars)
newbal_deathsonly<-balancetablefunction(d[d$year>=2008 & d$reasonshort=="death",], x=vars)

cbind(newbal_all, newbal_deathsonly)





# Figure A6: McCrary sorting test

d1<-d[d$year>=2009,]

pdf(file="sorting_test_plot.pdf", width=12, height=6)
par(mfrow=c(1,2))
DCdensity(d1$datefromrelease, 0)
title(main = paste("All By-Elections\np-value = ",round(DCdensity(d1$datefromrelease, 0, plot=F),6),sep=""), 
	xlab = "Days to/from Audit Release")
DCdensity(d1$datefromrelease[d1$reasonshort=="death"], 0) 
title(main = paste("By-Elections from Death Only\np-value = ",round(DCdensity(d1$datefromrelease[d1$reasonshort=="death"], 0, plot=F),3),sep=""), 
	xlab = "Days to/from Audit Release")
dev.off()

round(DCdensity(d1$datefromrelease, 0, plot=F),3)
round(DCdensity(d1$datefromrelease[d1$reasonshort=="death"], 0, plot=F),3)








# Table A17: Frequency of by-elections triggered by death across municipalities

freqdata<-read.csv("data_for_model_of_byelect_frequency.csv")

m1<-glm(death_elex ~ 
	log(S_Total)+
	toilet_flush2011+
	hosp
	, family = quasipoisson, data=freqdata)

m2<-glm(death_elex ~ 
	log(S_Total)+
	toilet_flush2011+
	hosp+
	as.factor(Province)
	, family = quasipoisson, data=freqdata)

m3<-glm(death_elex ~ 
	log(S_Total)+
	water_home2011+
	hosp+
	as.factor(Province)
	, family = quasipoisson, data=freqdata)

m4<-glm(death_elex ~ 
	log(S_Total)+
	toilet_flush2011+
	bighosp+
	as.factor(Province)
	, family = quasipoisson, data=freqdata)

m5<-glm(as.numeric(death_elex>0) ~ 
	log(S_Total)+
	toilet_flush2011+
	hosp+
	as.factor(Province)
	, family = binomial(link="logit"), data=freqdata)


screenreg(list(m1,m2,m3,m4,m5),
digits=3,  stars = c(0.01, 0.05, 0.1), omit=c("as.factor"))



